#!/usr/bin/env python3
"""
generate_report.py

Generate a Markdown report summarising the correlation analysis between
flip counts and Wilson‑loop fluctuations.

This script:
  1. Loads the YAML config to find the CSV path.
  2. Reads the CSV produced by run_correlation.py.
  3. Drops any NaN rows in 'r'.
  4. Detects whether 'ci_low'/'ci_high' and 'p' columns are present.
  5. Plots bar charts of r vs loop_size, with error bars if available.
  6. Writes a UTF‑8–encoded Markdown report (report.md) embedding the plots.
"""

import os
import yaml
import pandas as pd
import matplotlib.pyplot as plt


def load_config(path):
    """Load the repository's YAML config."""
    with open(path, "r", encoding="utf-8") as f:
        return yaml.safe_load(f)


def main():
    # 1. Resolve repo and config paths
    here = os.path.dirname(os.path.abspath(__file__))
    repo_root = os.path.abspath(os.path.join(here, os.pardir))
    cfg = load_config(os.path.join(repo_root, "config.yaml"))

    # 2. CSV + results directory
    csv_rel     = cfg["results"]["output_csv"]
    csv_path    = os.path.join(repo_root, csv_rel)
    results_dir = os.path.dirname(csv_path)
    os.makedirs(results_dir, exist_ok=True)

    # 3. Load and clean data
    df = pd.read_csv(csv_path)
    df = df.dropna(subset=["r"])  # skip undefined correlations

    # 4. Detect optional columns
    has_ci = all(col in df.columns for col in ("r_ci_lower", "r_ci_upper"))
    has_p  = "p_value" in df.columns

    # 5. Generate plots per gauge group
    plots = {}
    for grp in sorted(df["gauge_group"].unique()):
        sub = df[df["gauge_group"] == grp].sort_values("loop_size")
        if has_ci:
            yerr = [sub["r"] - sub["r_ci_lower"], sub["r_ci_upper"] - sub["r"]]
            ax = sub.plot.bar(
                x="loop_size", y="r", yerr=yerr,
                legend=False, title=f"{grp} Correlation vs. Loop Size"
            )
        else:
            ax = sub.plot.bar(
                x="loop_size", y="r",
                legend=False, title=f"{grp} Correlation vs. Loop Size"
            )
        fig = ax.get_figure()
        img_name = f"{grp}_correlation.png"
        img_path = os.path.join(results_dir, img_name)
        fig.savefig(img_path, dpi=150, bbox_inches="tight")
        plots[grp] = os.path.relpath(img_path, repo_root)
        plt.close(fig)

    # 6. Build Markdown report
    lines = ["# Correlation Analysis Report", ""]
    lines += [
        "This report summarizes the Pearson correlation between per‑link flip counts",
        "and local Wilson‑loop variances for each gauge group and loop size.",
        ""
    ]

    # Table header
    header = "| Gauge Group | Loop Size | r |"
    header += " 95% CI |" if has_ci else ""
    header += " p‑value |" if has_p else ""
    sep = "|" + ":-----------:|:---------:|:-----:|"
    if has_ci: sep += ":------------------:|"
    if has_p: sep += ":-------:|"
    lines += ["## Summary Table", "", header, sep]

    # Table rows
    for _, row in df.iterrows():
        parts = [f"{row['gauge_group']}", f"{int(row['loop_size'])}", f"{row['r']:.3f}"]
        if has_ci:
            parts.append(f"[{row['r_ci_lower']:.3f}, {row['r_ci_upper']:.3f}]")
        if has_p:
            parts.append(f"{row['p_value']:.3e}")
        lines.append("| " + " | ".join(parts) + " |")
    lines.append("")

    # Embed plots
    lines += ["## Correlation Plots", ""]
    for grp, img in plots.items():
        lines += [f"### {grp}", f"![{grp} Correlation]({img})", ""]

    # 7. Write with UTF‑8
    report_path = os.path.join(results_dir, "report.md")
    with open(report_path, "w", encoding="utf-8") as f:
        f.write("\n".join(lines))

    print(f"Report written: {report_path}")


if __name__ == "__main__":
    main()